# ==============================================================================
# file name: 3-create-analysis-data.R
# author: Bernhard Clemm
# date: Jun 26, 2025
# purpose: merge surveys and summarized browsing data into analysis datasets
#          FB and LU datasets will be in long format (one row per person-wave)
# ==============================================================================

rm(list = ls())

# FACEBOOK =====================================================================

## Browsing data ####
people_visits_fb <- read.csv("data/browsing_summarized/people_visits_FB.csv")
people_prof_fb <- read.csv("data/browsing_summarized/people_prof_FB.csv")
people_repeated_fb <- read.csv("data/browsing_summarized/people_repeated_FB.csv")

## Survey data ####
survey_fb <- read.csv("data/surveys_processed/survey_FB_donors.csv")
survey_fb_long <- survey_fb %>%
  pivot_longer(
    cols = matches(".*_[12]$"),
    names_to = c(".value", "wave"),
    names_pattern = "^(.*)_([12])$"
  ) %>%
  mutate(wave = as.integer(wave))

## Merge ####
merged_fb <- people_visits_fb %>%
  left_join(., people_prof_fb, by = "person_id") %>%
  left_join(., people_repeated_fb %>% select(-dataset), by = "person_id") %>%
  # full join to include subjects who did W2 survey but did not donate in W2
  full_join(., survey_fb_long, by = c("person_id", "wave")) %>%
  distinct(person_id, weight, wave, .keep_all = TRUE) %>%
  mutate(dataset = "Facebook", person_id = as.character(person_id))

write.csv(merged_fb, "data/analysis_FB.csv", row.names = F)

# LUCID =====================================================================

## Browsing data ####
people_visits_lu <- read.csv("data/browsing_summarized/people_visits_LU.csv")
people_prof_lu <- read.csv("data/browsing_summarized/people_prof_LU.csv")
people_repeated_lu <- read.csv("data/browsing_summarized/people_repeated_LU.csv")

## Survey data ####
survey_lu <- read.csv("data/surveys_processed/survey_LU_donors.csv")
survey_lu_long <- survey_lu %>%
  pivot_longer(
    cols = matches(".*_w[1-3]$"),
    names_to = c(".value", "wave"),
    names_pattern = "^(.*)_w([1-3])$"
  ) %>%
  mutate(wave = as.integer(wave))

## Merge ####
merged_lu <- people_visits_lu %>%
  left_join(., people_prof_lu, by = "person_id") %>%
  left_join(., people_repeated_lu %>% select(-dataset), by = "person_id") %>%
  left_join(., survey_lu_long, by = c("person_id", "wave")) %>%
  mutate(dataset = "Lucid", person_id = as.character(person_id))

write.csv(merged_lu, "data/analysis_LU.csv", row.names = F)

# YOUGOV =======================================================================

## Browsing data ####
people_visits_yg <- read.csv("data/browsing_summarized/people_visits_YG_anon.csv") %>%
  rename("person_id" = id_anon)
people_prof_yg <- read.csv("data/browsing_summarized/people_prof_YG.csv")
people_repeated_yg <- read.csv("data/browsing_summarized/people_repeated_YG_anon.csv") %>%
  rename("person_id" = id_anon)

## Survey data ####
survey_yg <- read.csv("data/surveys_processed/survey_YG_donors.csv")

## Merge ####
merged_yg <- people_visits_yg %>%
  left_join(., people_prof_yg, by = "person_id") %>%
  left_join(., people_repeated_yg %>% select(-dataset), by = "person_id") %>%
  left_join(., survey_yg, by = "person_id") %>%
  mutate(dataset = "YouGov", person_id = as.character(person_id))

write.csv(merged_yg, "data/analysis_YG.csv", row.names = F)
